cd "/Users/mmoral/Dropbox/Sabancı University/-ONGOING/On the Relationship between Party Polarization and Citizen Polarization"

** Merging the EES, CSES, and NES Datasets
*import excel "Political Parties.xlsx", sheet("Stata Corrected") firstrow case(lower) clear
*saveold "Stata Corrected_All.dta", replace

use "EES/ees_merged v2.dta", clear
append using "CSES/cses_merged v2.dta"
append using "NES/nes_merged v2.dta"
drop study party partyname

drop if iso==1470 | iso==18261 						/* Malta and Northern Ireland are not covered in the CMP data. 33 obs deleted. */
drop if iso==1100 | iso==1191 | iso==1196 | iso==1203 | iso==12761 | iso==1233 | iso==1348 | iso==1428 | iso==1440 | iso==1616 | iso==1642 | iso==1703 | iso==1705 /* Bulgaria, Croatia, 
Cyprus, Czech Republic, East Germany, Estonia, Hungary, Latvia, Lithuania, Poland, Romania, Slovakia, Slovenia do not have sufficient observations. 224 obs deleted. */

merge 1:1 pid iso date using "Stata Corrected_All.dta"
sort iso date pid
replace study=study[_n-1] if _merge==1 & missing(study) & date==date[_n-1] & iso==iso[_n-1]
replace study=study[_n+1] if _merge==1 & missing(study) & date==date[_n+1] & iso==iso[_n+1]
replace study=study[_n+1] if _merge==1 & missing(study) & date==date[_n+1] & iso==iso[_n+1]
tab study if _merge==1
drop _merge											/* 184 unmatched: 113 from the EES, 25 from the CSES(w/ no party code), 46 from the NES("Data Append_NES v14") */

merge m:1 date countryname party using "/Users/mmoral/Dropbox/Sabancı University/-ONGOING/On the Relationship between Party Polarization and Citizen Polarization/MPDataset_Old/MPDataset 2019b/MPDataset_MPDS2019b_stata14.dta", force
sort _merge countryname date pid
replace vote_share=pervote if missing(vote_share) & !missing(pervote)
replace seat_share=(absseat*100)/totseats if missing(seat_share) & !missing(absseat)
replace parfam_nonmiss=parfam if missing(parfam_nonmiss) & !missing(parfam)
drop if _merge==2 									/* 3446 parties from unexamined elections and/or countries in the CMP data */
drop if seat_share==0								/* 29 electoral coalitions and non-legislative parties w/ no seat share in the CMP data or in new elections (See "All parties v15.xlsx /Dropped Seat=0") */
drop if missing(vote_share)							/* 189 unmatched parties w/ no vote share in the CMP data. */
drop country oecdmember-_merge
sort countryname date pid

egen panel=group (iso date)
order countryname iso study date panel turnout_sample pid party partyname left parfam_nonmiss vote_share seat_share, first
format %25s partyname
distinct panel										/* 1046 (237) parties in 180 elections in total: 5.81 parties per election, 9 elections per country, 20 countries (1000, 174 - 5.75, 9.16, 20 in the effective sample) */

** Polarization Measures
* Number and Effective Number of Parties
by panel, sort: egen sumvote=total(vote_share)
by panel, sort: gen pidN=_N

replace vote_share=vote_share*(1/sumvote)
gen votesq=vote_share^2
by pan, sort: egen sumvotesq=total(votesq)
gen enep=1/sumvotesq

* Displacement from mean party stand
by panel, sort: egen meanps=mean(left)
gen wem=vote_share*left
by panel, sort: egen wmeanps=total(wem)

foreach var in meanr meanl{
gen `var'=.a
}
levelsof panel, local(levels)
foreach l of local levels {
qui sum left if left<meanps & panel==`l'
replace meanl=`r(mean)' if panel==`l'
qui sum left if left>meanps & panel==`l'
replace meanr=`r(mean)' if panel==`l'
}
gen polv=meanr-meanl

* Distance-based measures
foreach rank in vrank1 vrank2{
gen `rank'=.a
}

gsort panel -vote_share
bysort panel: egen vrank=rank(-vote_share)

levelsof panel, local(levels)
foreach l of local levels {
qui sum vrank if panel==`l'
qui sum left if vrank==1 & panel==`l'
replace vrank1=`r(mean)' if panel==`l'
qui sum left if vrank==2 & panel==`l'
replace vrank2=`r(mean)' if panel==`l'
}
gen sartori=abs(vrank1-vrank2)

* Dispersion
by panel, sort: egen uwedis=sd(left)				/* Ezrow 2007, Kim et al. 2010 */

gen dif2=(vote_share)*(left-wmeanps)^2
by panel, sort: egen tot2=total(dif2)
by panel, sort: egen mwe=mean(vote_share)
gen tot3=tot2/((pidN-1)*mwe)
gen wedis=sqrt(tot3) 								/* Also see sum of weighted and unweighted (squared) party distances from the weighted mean party stand 
(Lachat 2008, Lachat 2010, Lupu 2015, Hazan 1995, Pardos-Prado and Dinas 2010, Fazekas and Meder 2013, Tavits and Letki 2014, Pardos-Prado 2012) */
tabstat left [aw=vote_share], stat(sd) by(panel)

** Polarization Index (Dalton 2008, Wang 2012, Dejaeghere and Dassonneville 2017) 
gen diff3=100*vote_share*((left-wmeanps)/5)^2
by panel, sort: egen pi=total(diff3)
replace pi=sqrt(pi)

** Polarization Score (Esteban and Ray 1994, Oosterwal and Torenvlied 2010, Maoz and Somer-Topcu 2010, Rehm and Reilly 2010, Indridason 2011, Han 2015)
gen estray=.
levelsof panel, local(levels)
foreach l of local levels {
er left [aw=vote_share] if panel==`l', normalize(none) a(1)
replace estray=`r(er_1)' if panel==`l'
}

*order countryname iso date panel study pid party partyname parfam resleft* rescenter medianv left vote_share seat_share, first
collapse (firstnm) countryname study (mean) pidN enep wedis uwedis polv pi estray sartori resleft* date iso, by(panel)
order countryname iso date panel study, first

* Other Variables
* Number of Elections
by iso, sort: gen totelec=_N
by iso, sort: gen elec=_n

* Gaps
gen gap=0									/* Marking elections w/ gaps */
replace gap=1 if iso==1040 & date<200211 	
replace gap=1 if iso==1056				 	
replace gap=1 if iso==1208 & date<199409
replace gap=1 if iso==1246 & date<199903
replace gap=1 if iso==1276 & date<198303
replace gap=1 if iso==1300 & date<200004
replace gap=2 if iso==1300 & date<199310	/* 2 elections-long gap */
replace gap=1 if iso==1380 & date<199403
replace gap=1 if iso==1528 & date<198105
replace gap=1 if iso==1620 & date<200203	
drop if iso==1056 							/* Belgium has only 2 consecutive elections between 1985 and 1995, is therefore dropped. */	
drop if gap==2								/* Greece (198906) is dropped due to a gap of two elections. */

distinct elec countryname
preserve
drop if gap==1
distinct elec countryname
restore										/* 174 elections w/ a total of 12 gaps in 19 countries, 9.2 on average, 8.5 wo/ gaps on average */

** Other Independent Variables
* Democratic Electoral Systems around the World, 1946-2016 Dataset
/* use "Democratic Electoral Systems Around the World, 1946-2016/es_data-v3.dta", clear
keep if aclp_code==129 | aclp_code==101 | aclp_code==102 | aclp_code==105 | aclp_code==106 | aclp_code==107 | aclp_code==108 | aclp_code==191 | aclp_code==110 | aclp_code==113 | aclp_code==114 | ///
aclp_code==115 | aclp_code==117 | aclp_code==131 | aclp_code==118 | aclp_code==120 | aclp_code==122 | aclp_code==123 | aclp_code==124 | aclp_code==126 | aclp_code==65
recode aclp_code (191=108)
drop if presidential==1

rename date date2
destring month, replace
gen date = 100*year+month

recode aclp_code (129=1036) (101=1040) (102=1056) (105=1208) (106=1246) (107=1250) (108=1276) (110=1300) (113=1372) (114=1380) (115=1442) (117=1528) (131=1554) (118=1578) (120=1620) (122=1724) (123=1752) (124=1756) (126=1826) (65=1840), gen(iso)
lab define iso 1036 "Australia" 1040 "Austria" 1056 "Belgium" 1208 "Denmark" 1246 "Finland" 1250 "France" 1276 "Germany" 1300 "Greece" 1372 "Ireland" 1380 "Italy" 1442 "Luxembourg" ///
1528 "Netherlands" 1554 "New Zealand" 1578 "Norway" 1620 "Portugal" 1724 "Spain" 1752 "Sweden" 1756 "Switzerland" 1826 "United Kingdom" 1840 "United States", replace
lab val iso iso
order country iso date, first

levelsof country, local(levels)
foreach l of local levels {
disp "**`l'**" 
list date year if country=="`l'"
}
replace date=197103 if date==197104 & iso==1528
save "es_date-v3_v2.dta", replace */

merge 1:1 iso date using "/Users/mmoral/Dropbox/Sabancı University/-ONGOING/On the Relationship between Party Polarization and Citizen Polarization/es_date-v3_v2.dta", force
drop if _merge==2 												/* 7 missing elections in Netherlands (1971), Austria, France, Germany, Netherlands, United Kingdom (2017), and Italy (2018). */
destring legislative_type tier1_avemag, replace
recode legislative_type (2=0) (3=1) (1=2), gen(pr_rule)
gen av_dismag=tier1_avemag
drop country-_merge
sort iso date
foreach var of varlist av_dismag pr_rule{	
replace `var'=`var'[_n-1] if iso==iso[_n-1] & missing(`var')	/* 2017 and 2018 elections are recoded according to previous elections. */
replace `var'=`var'[_n+1] if iso==iso[_n+1] & missing(`var')	/* Netherlands (1971) recoded according to Netherlands (1972) */
}											

* Compulsory Voting
gen compulsory=1 if inlist(countryname, "Australia", "Austria", "Greece", "Italy", "Luxembourg")
replace compulsory=0 if countryname=="Austria" & date>200410
replace compulsory=0 if countryname=="Greece" & date>200000
replace compulsory=0 if countryname=="Italy" & date>199300
replace compulsory=0 if missing(compulsory)

* Other L-R Scales
gen other_scale=1 if countryname=="United Kingdom" & date==198306
replace other_scale=2 if countryname=="Netherlands" & inlist(date, 197104, 197211)
replace other_scale=3 if countryname=="Spain" & inlist(date, 198906, 201512, 201606)
replace other_scale=3 if countryname=="Netherlands" & inlist(date, 198105, 198209, 198909)
replace other_scale=3 if countryname=="Norway" & inlist(date, 198509, 198909, 199309)
replace other_scale=3 if inlist(study, "EES1989", "EES1994", "EES1999", "EES1999", "EES2004")
replace other_scale=0 if countryname=="Sweden" & study=="EES2004"
replace other_scale=4 if countryname=="Austria" & date==200610
replace other_scale=4 if countryname=="Germany" & inlist(date, 197610, 198303, 199012)
replace other_scale=4 if study=="EES2014"
replace other_scale=5 if countryname=="New Zealand" & inlist(date, 199010, 199311)
replace other_scale=5 if countryname=="United States"
replace other_scale=6 if countryname=="Finland" & date==199103
replace other_scale=6 if countryname=="Norway" & inlist(date, 197709, 198109)
replace other_scale=0 if missing(other_scale)

lab def scales 0 "0/10" 1 "-10/10" 2 "0/6" 3 "1/10" 4 "1/11" 5 "1/7" 6 "1/9"
lab val other_scale scales
recode other_scale (0=0) (else=1), gen(other_scale_d)

* Election Date
replace date=date/100
gen year=round(date)

* Election Decade				
egen decade=cut(year), at(1970(10)2020) icodes lab		

* Time between Elections
xtset iso elec
gen mdif=100*mod(D.date, .88)

* First-differenced and Lagged Forms				
foreach var of varlist resleft* wedis uwedis polv pi estray sartori enep av_dismag{
gen l`var'=L.`var'
gen del`var'=`var'-l`var'
} 										

* Variable and Value Labels
lab var countryname "Country Name"
lab var panel "Panel ID"
lab var study "Source"
lab var iso "ISO Code"
lab var wedis "Wgh. Standard Deviation of Party Positions"
lab var uwedis "Standard Deviation of Party Positions"
lab var polv "Distance bw. Average Left-right Positions"
lab var pi "Dalton Polarization Index"
lab var estray "Esteban and Ray Polarization Score"
lab var sartori "Distance bw. Largest Parties"
lab var pidN "Number of Parties"
lab var enep "Effective Number of Parties"
lab var resleft "Citizen Polarization (All Respondents)"
lab var resleftvot "Citizen Polarization (Voters)" 
lab var resleftabs "Citizen Polarization (Non-voters)"
lab var resleftsop3 "Citizen Polarization (Low Sophistication)"
lab var resleftsop2 "Citizen Polarization (Med. Sophistication)"
lab var resleftsop1 "Citizen Polarization (High Sophistication)"
lab var resleftpkn "Citizen Polarization (Knowledgeable)"
lab var resleftpuk "Citizen Polarization (Unknowledgeable)"
lab var reslefteduch "Citizen Polarization (High Education)"
lab var reslefteducl "Citizen Polarization (Low Education)"
lab var resleftsop_alt4 "Citizen Polarization (Lowest Sophistication)"
lab var resleftsop_alt3 "Citizen Polarization (Med.-Low Sophistication)"
lab var resleftsop_alt2 "Citizen Polarization (Med.-High Sophistication)"
lab var resleftsop_alt1 "Citizen Polarization (Highest Sophistication)"
lab var date "Election Date"
lab var totelec "Number of Elections"
lab var elec "Election ID"
lab var gap "Election/Country w/ Gap"
lab var pr_rule "Legislative Type"
lab var av_dismag "Average District Magnitude" 
lab var year "Election Year"
lab var decade "Election Decade"
lab var compulsory "Compulsory Voting"
lab var other_scale_d "Other LR Scale"
lab var mdif "Time bw. Elections (in Months)"
foreach var of varlist resleft* wedis uwedis polv pi estray sartori enep av_dismag{
local lab`var': var lab `var' 
lab var l`var' "`lab`var''_{t-1}"
lab var del`var' "{&Delta} `lab`var''"
}
lab define iso 1036 "Australia" 1040 "Austria" 1208 "Denmark" 1246 "Finland" 1250 "France" 1276 "Germany" 1300 "Greece" 1372 "Ireland" 1380 "Italy" 1442 "Luxembourg" ///
1528 "Netherlands" 1554 "New Zealand" 1578 "Norway" 1620 "Portugal" 1724 "Spain" 1752 "Sweden" 1756 "Switzerland" 1826 "United Kingdom" 1840 "United States", replace
lab val iso iso
saveold "Replication Data.dta", replace
